# Required Packages
import pandas as pd
import numpy as np
import pickle
# preprocessing
from sklearn.impute import SimpleImputer
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex, clear_output
## progress bar
import progressbar
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
## seaborn
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("paper", rc={"font.size":12,"axes.titlesize":14,"axes.labelsize":12})
## matplotlib
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from matplotlib.patches import Ellipse, Polygon
import matplotlib.gridspec as gridspec
import matplotlib.colors
from pylab import rcParams
from matplotlib.font_manager import FontProperties
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
plt.style.use('seaborn-whitegrid')
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (17, 6)
mpl.rcParams['axes.labelsize'] = 14
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12
mpl.rcParams['text.color'] = 'k'
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
![]()
In this article, we investigate a set simulated dataset that mimics customer behavior on the Starbucks rewards mobile app. Starbucks tends to send out offers to users of the mobile app once every few days. These offers are exclusive, that is not all users receive the same offer. An offer can contain a discount for their products or sometimes BOGO (buy one get one free). These offers have a validity period before the offer expires. The article here is inspired by a towardsdatascience.com article.
def Header(Text, L = 100, C = 'Blue', T = 'White'):
BACK = {'Black': Back.BLACK, 'Red':Back.RED, 'Green':Back.GREEN, 'Yellow': Back.YELLOW, 'Blue': Back.BLUE,
'Magenta':Back.MAGENTA, 'Cyan': Back.CYAN}
FORE = {'Black': Fore.BLACK, 'Red':Fore.RED, 'Green':Fore.GREEN, 'Yellow':Fore.YELLOW, 'Blue':Fore.BLUE,
'Magenta':Fore.MAGENTA, 'Cyan':Fore.CYAN, 'White': Fore.WHITE}
print(BACK[C] + FORE[T] + Style.NORMAL + Text + Style.RESET_ALL + ' ' + FORE[C] +
Style.NORMAL + (L- len(Text) - 1)*'=' + Style.RESET_ALL)
def Line(L=100, C = 'Blue'):
FORE = {'Black': Fore.BLACK, 'Red':Fore.RED, 'Green':Fore.GREEN, 'Yellow':Fore.YELLOW, 'Blue':Fore.BLUE,
'Magenta':Fore.MAGENTA, 'Cyan':Fore.CYAN, 'White': Fore.WHITE}
print(FORE[C] + Style.NORMAL + L*'=' + Style.RESET_ALL)
# Portfolio Dataset
Header('Portfolio Dataset:')
Portfolio = pd.read_csv('StarBucks/Portfolio_Clean.csv')
display(Portfolio.head().style.hide_index())
# Profile Dataset
Header('Profile Dataset:', C = 'Green')
Profile = pd.read_csv('StarBucks/Profile_Clean.csv')
display(Profile.head().style.hide_index())
# Transcript Dataset
Header('Transcript Dataset:', C = 'Red')
Transcript = pd.read_csv('StarBucks/Transcript_Clean.csv')
display(Transcript.head().style.hide_index())
Line()
User_Data = pd.read_csv('StarBucks/User_Data.csv')
Data = pd.read_csv('StarBucks/Data.csv')
# Dictionaries
with open('StarBucks/Feat_Dict.pkl', 'rb') as fp:
Feat_Dict = pickle.load(fp)
Portfolio Dataset: =================================================================================
| Reward | Difficulty | Duration | Offer Type | Offer ID | Mobile | Social | Web | |
|---|---|---|---|---|---|---|---|---|
| 10 | 10 | 7 | BOGO | ae264e3637204a6fb9bb56bc8210ddfd | 1 | 1 | 1 | 0 |
| 10 | 10 | 5 | BOGO | 4d5c57ea9a6940dd891ad53e9dbe8da0 | 1 | 1 | 1 | 1 |
| 0 | 0 | 4 | Informational | 3f207df678b143eea3cee63160fa8bed | 1 | 1 | 0 | 1 |
| 5 | 5 | 7 | BOGO | 9b98b8c7a33c4b65b9aebfe6a799e6d9 | 1 | 1 | 0 | 1 |
| 5 | 20 | 10 | Discount | 0b1e1539f2cc45b7b9fa7c272da2e1d7 | 1 | 0 | 0 | 1 |
Profile Dataset: ===================================================================================
| Gender | Age | ID | Became Member On | Income | Member Since Year | Member Tenure |
|---|---|---|---|---|---|---|
| Other | 55 | 68be06ca386d4c31939f3a4f0e3dd783 | 2017-02-12 | 64000 | 2017 | 23 |
| Female | 55 | 0610b486422d4921ae7d2bf64640c50b | 2017-07-15 | 112000 | 2017 | 18 |
| Other | 55 | 38fe809add3b4fcf9315a9694bb96ff5 | 2018-07-12 | 64000 | 2018 | 6 |
| Female | 75 | 78afa995795e4d85b5d9ceeca43f5fef | 2017-05-09 | 100000 | 2017 | 20 |
| Other | 55 | a03223e636434f42ac4c3df47e8bac43 | 2017-08-04 | 64000 | 2017 | 17 |
Transcript Dataset: ================================================================================
| Person | Event | Value | Time | Amount | Reward | Offer ID |
|---|---|---|---|---|---|---|
| 78afa995795e4d85b5d9ceeca43f5fef | Offer Received | {'offer id': '9b98b8c7a33c4b65b9aebfe6a799e6d9'} | 0 | nan | nan | 9b98b8c7a33c4b65b9aebfe6a799e6d9 |
| a03223e636434f42ac4c3df47e8bac43 | Offer Received | {'offer id': '0b1e1539f2cc45b7b9fa7c272da2e1d7'} | 0 | nan | nan | 0b1e1539f2cc45b7b9fa7c272da2e1d7 |
| e2127556f4f64592b11af22de27a7932 | Offer Received | {'offer id': '2906b810c7d4411798c6938adc9daaa5'} | 0 | nan | nan | 2906b810c7d4411798c6938adc9daaa5 |
| 8ec6ce2a7e7949b1bf142def7d0e0586 | Offer Received | {'offer id': 'fafdcd668e3743c1bb461111dcafc2a4'} | 0 | nan | nan | fafdcd668e3743c1bb461111dcafc2a4 |
| 68617ca6246f4fbc85e91a2a49552598 | Offer Received | {'offer id': '4d5c57ea9a6940dd891ad53e9dbe8da0'} | 0 | nan | nan | 4d5c57ea9a6940dd891ad53e9dbe8da0 |
====================================================================================================
| Feature | Description |
|---|---|
| Reward (int) | Given reward for completing an offer |
| Channels (list of strings) | Email, mobile app, social media, etc |
| Difficulty (int) | Minimum spending requirement for completing an offer |
| Duration (int) | Time that an offer is valid |
| Offer_Type (string) | Type of offer |
| ID (string) | Offer ID |
| Feature | Description |
|---|---|
| Gender (str) | Customers gender |
| Age (int) | Customers age |
| ID (str) | Customers ID |
| Became_Member_On (int) | Date of membership |
| Income (float) | Customer's income |
| Feature | Description |
|---|---|
| Person (str) | Customer ID |
| Event (str) | Record description |
| time (int) | Time in hours (since the beginning of the study) |
| Value - (dict of strings) | Offer ID or transaction amount |
Group = Portfolio.groupby('Offer Type')[['Email','Mobile','Social','Web']].sum()
Group = Group.reset_index().melt(id_vars= ['Offer Type'], value_vars= Group.columns, var_name='Channels', value_name='Count')
Group ['Percentage'] = np.round(100* Group['Count'].values /Group['Count'].sum(), 2)
fig = px.bar(Group, x='Offer Type', y='Count', orientation='v',
color = 'Channels', barmode='group',
color_discrete_sequence= ['LightBlue', 'DeepSkyBlue', 'CornFlowerBlue', 'RoyalBlue', 'MediumBlue', 'Navy'],
text = 'Percentage', width = 800, height= 500)
fig.update_traces(marker_line_color= 'Black', marker_line_width=0.8, opacity=1,
texttemplate=' %{text:.2}%', textposition='inside')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=False, zerolinewidth=1, zerolinecolor='Black',
showgrid=False, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=True, zerolinewidth=1, zerolinecolor='Black',
showgrid=True, gridwidth=1, gridcolor='Lightgray', range=[0, 5])
fig.update_layout(plot_bgcolor= 'white', title={'text': '<b>' + 'Channel Distribution for Various Offers' + '<b>',
'x':0.5, 'y':0.91, 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
del Group
It seems that social media is the least efficient channel among all.
def FeatDispPlot(Feat, nbins = 10, Color = 'GreenYellow', LC = 'Black',
xLim = None, yLim = [0, 6e3], H = 450, titleY = 0.92, Inp = Profile):
fig = px.histogram(Inp, x = Feat, nbins=nbins, marginal= 'box',
color_discrete_sequence= [Color], hover_data= [Feat])
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=False, zerolinewidth=1, zerolinecolor='Black',
showgrid=False, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=True, zerolinewidth=1, zerolinecolor='Black',
showgrid=True, gridwidth=1, gridcolor='Lightgray')
# Median
fig.add_trace(go.Scatter(x= Inp[Feat].median()* np.ones(int(yLim[1])), y= np.arange(int(yLim[0]), int(yLim[1])),
name="Median", line=dict(color='RoyalBlue', width=2, dash='dot')))
# Mean
fig.add_trace(go.Scatter(x= Inp[Feat].mean()* np.ones(int(yLim[1])), y= np.arange(int(yLim[0]), int(yLim[1])),
name="Mean", line=dict(color='Red', width=2, dash='dot')))
Name = '%s Distribution' % Feat
fig.update_layout(legend_orientation='v', plot_bgcolor= 'white', height= H, width= 900,
title={'text': '<b>' + Name + '<b>', 'x':0.5, 'y': titleY, 'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.update_traces(marker_line_color= LC, marker_line_width=0.5, opacity=1)
fig['layout']['yaxis'].update(range=yLim)
if not xLim == None:
fig['layout']['xaxis'].update(range=xLim)
fig.show()
FeatDispPlot('Age')
Costumers are mainly from the age group 50-60.
FeatDispPlot('Income', Color = 'LightSkyblue', xLim = [2e4, 14e4], yLim = [0, 5e3])
Most of the customers' income is around 60K.
def FeatDispPlot2(Feat, LC = 'Black',
Colors = ['HoneyDew','GreenYellow', 'Bisque','LightSalmon','Plum', 'LightSkyBlue'],
yLim = [0, 9e3], H = 450, titleY = 0.92, Inp = Profile):
Group = Inp.groupby([Feat])[Feat].agg({'count'}).rename(columns = {'count':'Count'})
Group['Percentage'] = np.round(100* Group.values /Group.sum().values, 2)
Group.reset_index(drop = False, inplace = True)
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "xy"}, {'type':'domain'}]])
# Left
fig.add_trace(go.Bar(x= Group[Feat].values, y= Group['Count'].values, marker_color= Colors,
textposition='inside', showlegend = False, hovertext=list(Group['Percentage'])), 1, 1)
fig.update_layout(plot_bgcolor= 'white')
fig.update_traces(marker_line_color= LC, marker_line_width=1, opacity=1, row=1, col=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=False, zerolinewidth=1, zerolinecolor='Black',
showgrid=False, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=True, zerolinewidth=1, zerolinecolor='Black',
showgrid=True, gridwidth=1, gridcolor='Lightgray', range= yLim)
# Right
fig.add_trace(go.Pie(labels= Group[Feat].values, values= Group['Count'].values, textfont=dict(size=16),
marker=dict(colors = Colors, line=dict(color='black', width=1))), 1, 2)
fig.update_traces(marker_line_color= LC, marker_line_width=1, opacity=1, row=1, col=2)
fig.update_layout(plot_bgcolor= 'white', width = 900,
title={'text': '<b>' + '%s Distribution' % Feat + '<b>',
'x':0.5, 'y':0.90, 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
FeatDispPlot2('Gender', Colors = ['LightPink','CornFlowerBlue','LightGreen'])
The number of male customers is larger than the number of female customers.
FeatDispPlot2('Member Since Year', yLim = [0, 7e3])
Group = Profile.groupby(['Member Since Year','Gender'])['Gender'].agg({'count'}).rename(columns ={'count':'Count'})
Group['Percentage'] = np.round(100* Group.values /Group.sum().values, 2)
Group.reset_index(drop = False, inplace = True)
Group['Member Since Year'] = Group['Member Since Year'].astype(str)
Group.columns = [x.replace('_',' ') for x in Group.columns]
Colors = ['LightPink','CornFlowerBlue','LightGreen']
fig = px.bar(Group, y= 'Member Since Year', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', color_discrete_sequence= Colors, hover_data = Group.columns, height= 450)
fig.update_traces(marker_line_color= 'Navy', marker_line_width=1, opacity=1)
fig.update_traces(texttemplate='%{text:.2}%', textposition='inside')
fig.update_layout(plot_bgcolor= 'white')
fig.update_traces(marker_line_color= 'Black', marker_line_width=1, opacity=1, row=1, col=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=True, zerolinewidth=1, zerolinecolor='Black',
showgrid=True, gridwidth=1, gridcolor='Lightgray', range=[0, 40])
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=False, zerolinewidth=1, zerolinecolor='Black',
showgrid=False, gridwidth=1, gridcolor='Lightgray')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide')
fig.update_layout(plot_bgcolor= 'white', width = 950,
title={'text': '<b>' + 'Membership Growth Over Year by Gender' + '<b>',
'x':0.5, 'y':0.95, 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
del Group
Assuming the data is a snapshot of the end of 2018. get member tenure by the number of months.
Group = Profile[['Member Tenure','Gender']].dropna()
Group.columns = [x.replace('_',' ') for x in Group.columns]
Colors = ['LightPink','CornFlowerBlue','LightGreen']
fig = px.histogram(Group, x = 'Member Tenure', color='Gender', nbins=100, marginal= 'box', color_discrete_sequence= Colors,
hover_data= Group.columns)
fig.update_layout(plot_bgcolor= 'white')
fig.update_traces(marker_line_color= 'Navy', marker_line_width=1, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 1000])
fig['layout']['xaxis'].update(range=[Group['Member Tenure'].min()-1, Group['Member Tenure'].max()+1])
fig.update_layout(plot_bgcolor= 'white', width = 950,
title={'text': '<b>' + 'Member Tenure Distribution by Gender' + '<b>',
'x':0.5, 'y':0.95, 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
del Group
Summary_Transcript = pd.DataFrame(Transcript.groupby(['Event'])['Event'].count())
Summary_Transcript.columns = ['Count']
display(Summary_Transcript.T)
data = {'Completed': [100*Summary_Transcript.loc['Offer Completed','Count']/Summary_Transcript.loc['Offer Viewed','Count']],
'Viewed':[100*Summary_Transcript.loc['Offer Viewed','Count']/Summary_Transcript.loc['Offer Received','Count']]}
Group = pd.DataFrame(data = data).round(2).T
Group.columns = ['Percentage']
display(Group.T)
fig = plt.figure(figsize=(15, 6), constrained_layout=True)
gs = fig.add_gridspec(1, 5)
ax0 = fig.add_subplot(gs[:-1])
ax1 = fig.add_subplot(gs[-1])
_ = sns.barplot(ax = ax0, y="Event", x="Count", palette='Purples',
edgecolor='k', hatch="///", data=Summary_Transcript.reset_index(drop = False))
_ = ax0.set_xlim([0, 14e4])
_ = ax0.set_xlabel('Count', fontsize = 14)
_ = sns.barplot(ax = ax1, x="index", y="Percentage", palette='Blues',
edgecolor='k', hatch="///", data=Group.reset_index(drop = False))
_ = ax1.set_xlabel(None, fontsize = 14)
_ = ax1.set_ylim([0,100])
_ = ax1.set_yticks(np.arange(0, 101, 10))
plt.subplots_adjust(wspace= 0.5)
| Event | Offer Completed | Offer Received | Offer Viewed | Transaction |
|---|---|---|---|---|
| Count | 33579 | 76277 | 57725 | 138953 |
| Completed | Viewed | |
|---|---|---|
| Percentage | 58.17 | 75.68 |
Group = Data.groupby(['Event','Offer Type'])['Offer Type'].agg({'count'}).rename(columns ={'count':'Count'})
Group['Percentage'] = np.round(100* Group.values /Group.sum().values, 2)
Group.reset_index(drop = False, inplace = True)
Colors = ['GreenYellow','LightSalmon','LightSkyBlue']
fig = px.bar(Group, y= 'Offer Type', x= 'Percentage', orientation='h',
color = 'Event', text = 'Percentage', color_discrete_sequence= Colors, hover_data = Group.columns, height= 350)
fig.update_traces(marker_line_color= 'Navy', marker_line_width=1, opacity=1)
fig.update_traces(texttemplate='%{text:.2}%', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide')
fig.update_layout(plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=True, zerolinewidth=1, zerolinecolor='Black',
showgrid=True, gridwidth=1, gridcolor='Lightgray', range=[0, 50])
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=False, zerolinewidth=1, zerolinecolor='Black',
showgrid=False, gridwidth=1, gridcolor='Lightgray', title_text='Offer Type')
fig.update_layout(plot_bgcolor= 'white', width = 950,
title={'text': '<b>' + 'Event by Offer Type' + '<b>',
'x':0.5, 'y':0.92, 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
del Group
First off,
fig = go.Figure(go.Sankey(arrangement = "snap",
node = {"label": ['Offer Received', 'Offer Viewed', 'Transaction', 'Offer Completed'],
"x": [0, 0, 0, 0], "y": [0, 1, 2, 3], 'pad':10},
link = {"source": [0, 1, 2, 3], "target": [1, 2, 3], "value": [.5, .5, .5, .5]}))
fig.update_layout(height = 200, title={'text': '<b>' + """A Customer's Journey""" + '<b>',
'x':0.5, 'y':0.92, 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Now, let's focus on those customers that completed an offer.
# creating a list of these customers
Ofer_Completed_List = Data.loc[Data['Event'] == 'Offer Completed', 'Person'].tolist()
The journey of one of these customers can be analyzed as well. For example, the first customer from the above list
Customer_Journey = Data.loc[Data.Person == Ofer_Completed_List[0],:].sort_index()
Customer_Journey.head(6).style.hide_index()
| Person | Event | Time | Amount | Reward Received | Offer ID | Reward Defined | Difficulty | Duration | Offer Type | Mobile | Social | Web | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 9fa9ae8f57894cc9a3b8a9bbe0fc1b2f | Offer Received | 0 | nan | nan | 2906b810c7d4411798c6938adc9daaa5 | 2.000000 | 10.000000 | 7.000000 | Discount | 1.000000 | 1.000000 | 0.000000 | 1.000000 |
| 9fa9ae8f57894cc9a3b8a9bbe0fc1b2f | Offer Viewed | 0 | nan | nan | 2906b810c7d4411798c6938adc9daaa5 | 2.000000 | 10.000000 | 7.000000 | Discount | 1.000000 | 1.000000 | 0.000000 | 1.000000 |
| 9fa9ae8f57894cc9a3b8a9bbe0fc1b2f | Transaction | 0 | 34.560000 | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan |
| 9fa9ae8f57894cc9a3b8a9bbe0fc1b2f | Offer Completed | 0 | nan | 2.000000 | 2906b810c7d4411798c6938adc9daaa5 | 2.000000 | 10.000000 | 7.000000 | Discount | 1.000000 | 1.000000 | 0.000000 | 1.000000 |
| 9fa9ae8f57894cc9a3b8a9bbe0fc1b2f | Transaction | 42 | 21.550000 | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan |
| 9fa9ae8f57894cc9a3b8a9bbe0fc1b2f | Transaction | 114 | 32.870000 | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan |
To have plots for this part. We need to calulate transactions amount without any offers and The number of transaction without any offers.
# Transactions amount without any offers
User_Data['Tran Amnt No Offer'] = User_Data['Tot Tran Amnt']- User_Data['Offer Trans Amnt']
# The number of transaction without any offers
User_Data['Trans Cnt No Offer'] = User_Data['Tot Tran Cnt']- User_Data['Offer Completed']
# Converting to integers
User_Data[['BOGO offer','Disc offer','Info offer']] = User_Data[['BOGO offer','Disc offer','Info offer']].astype(int)
In the following tables and plots, (BOGO Offer, Discount Offer, Informational Offer) shows that what offer has been used. For example, (1,1,0) means people who respond to BOGO Offers and Discount Offers but not Informational Offers.
def Mean_by_Offer_Type (Columns):
Cols = ['BOGO offer','Disc offer','Info offer']
j = Columns[0]
Group = User_Data.groupby(Cols)[j].mean().reset_index()
for j in Columns[1:]:
Temp = User_Data.groupby(Cols)[j].mean().reset_index()
Group = Group.merge(Temp, on =Cols)
del Temp
return Group
Column_List = ['Offer Tran Cnt Ratio', 'Offer Trans Amnt Ratio', 'Offer Comp View Ratio', 'Offer Comp Rec Ratio']
Group = Mean_by_Offer_Type(Column_List).set_index(['BOGO offer','Disc offer','Info offer'])
display(Group.round(4))
Colors = ['GreenYellow', 'Bisque','LightSalmon','Plum', 'LightSkyBlue']
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16, 7), sharex=False)
_ = Group.plot.bar(ax = ax, rot = 0, edgecolor = 'Navy', color = Colors, hatch = '//')
_ = ax.set_title('Metric Average of Offer Type Combinations')
_ = ax.legend([Feat_Dict[x] for x in Column_List], loc='upper left', fontsize = 12)
_ = ax.set_xlabel('(BOGO Offer, Discount Offer, Informational Offer)')
_ = ax.set_ylim([0,1])
| Offer Tran Cnt Ratio | Offer Trans Amnt Ratio | Offer Comp View Ratio | Offer Comp Rec Ratio | |||
|---|---|---|---|---|---|---|
| BOGO offer | Disc offer | Info offer | ||||
| 0 | 0 | 0 | 0.0000 | 0.0000 | 0.0000 | 0.0000 |
| 1 | 0.2110 | 0.3135 | 0.2807 | 0.2892 | ||
| 1 | 0 | 0.2383 | 0.4033 | 0.5249 | 0.3604 | |
| 1 | 0.3114 | 0.5239 | 0.5364 | 0.5888 | ||
| 1 | 0 | 0 | 0.2728 | 0.3540 | 0.5516 | 0.3782 |
| 1 | 0.3542 | 0.5111 | 0.5501 | 0.6056 | ||
| 1 | 0 | 0.3840 | 0.4542 | 0.7589 | 0.6470 | |
| 1 | 0.4140 | 0.5511 | 0.6838 | 0.7924 |
Column_List = ['Offer Completed', 'Tot Tran Cnt', 'Trans Cnt No Offer', 'Tot Rewards Rec', 'Reward per Offer',
'Difficulty per Offer']
Feat_Dict.update({'Offer Completed': 'Offer Completed', 'Trans Cnt No Offer':'Trans Count without Offer'})
Group = Mean_by_Offer_Type (Column_List).set_index(['BOGO offer','Disc offer','Info offer'])
display(Group.round(4))
Colors = ['Blue', 'Orange', 'DarkGreen','Purple','LightPink', 'Crimson']
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16, 7), sharex=False)
_ = Group.plot.bar(ax = ax, rot = 0, edgecolor = 'Navy', color = Colors)
_ = ax.set_title('Metric Average of Offer Type Combinations')
_ = ax.legend([Feat_Dict[x] for x in Column_List], loc='upper left', fontsize = 12)
_ = ax.set_xlabel('(BOGO Offer, Discount Offer, Informational Offer)')
_ = ax.set_ylim([0,20])
_ = ax.set_yticks(np.arange(0, 21, 2))
| Offer Completed | Tot Tran Cnt | Trans Cnt No Offer | Tot Rewards Rec | Reward per Offer | Difficulty per Offer | |||
|---|---|---|---|---|---|---|---|---|
| BOGO offer | Disc offer | Info offer | ||||||
| 0 | 0 | 0 | 0.0000 | 4.4779 | 4.4779 | 1.5248 | 0.0000 | 0.0000 |
| 1 | 1.1944 | 7.5111 | 6.3168 | 1.6380 | 1.4593 | 0.0000 | ||
| 1 | 0 | 1.4905 | 9.0246 | 7.5341 | 7.1875 | 5.1920 | 9.5161 | |
| 1 | 2.6759 | 10.8667 | 8.1909 | 6.6585 | 2.5487 | 5.2188 | ||
| 1 | 0 | 0 | 1.5340 | 7.7338 | 6.1998 | 15.7631 | 10.7039 | 7.2817 |
| 1 | 2.7579 | 9.5486 | 6.7907 | 14.9018 | 5.4684 | 3.9611 | ||
| 1 | 0 | 2.9435 | 9.8640 | 6.9205 | 18.6081 | 6.5097 | 8.8200 | |
| 1 | 3.9506 | 11.2950 | 7.3444 | 16.6150 | 4.2396 | 6.2400 |
Column_List = [ 'Tot Tran Amnt', 'Offer Trans Amnt', 'Tran Amnt per Offer', 'Ave Tran Amnt', 'Tran Amnt No Offer']
Feat_Dict.update({'Tran Amnt No Offer': 'Transaction Amount without Offer'})
Group = Mean_by_Offer_Type (Column_List).set_index(['BOGO offer','Disc offer','Info offer'])
display(Group.round(4))
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16, 7), sharex=False)
_ = Group.plot.bar(ax = ax, rot = 0, edgecolor = 'k',
color = sns.set_palette(["#9b59b6", "#3498db", "#e74c3c", "#34495e","#2ecc71","#95a5a6"]))
_ = ax.set_title('Metric Average of Offer Type Combinations')
_ = ax.legend([Feat_Dict[x] for x in Column_List], loc='upper left', fontsize = 12)
_ = ax.set_xlabel('(BOGO Offer, Discount Offer, Informational Offer)')
_ = ax.set_ylim([0,200])
_ = ax.set_yticks(np.arange(0, 201, 20))
| Tot Tran Amnt | Offer Trans Amnt | Tran Amnt per Offer | Ave Tran Amnt | Tran Amnt No Offer | |||
|---|---|---|---|---|---|---|---|
| BOGO offer | Disc offer | Info offer | |||||
| 0 | 0 | 0 | 26.2267 | 0.0000 | 0.0000 | 6.1674 | 26.2267 |
| 1 | 46.7299 | 14.4213 | 11.4998 | 7.2478 | 32.3086 | ||
| 1 | 0 | 95.1184 | 29.5337 | 19.3329 | 13.3275 | 65.5847 | |
| 1 | 115.7472 | 52.0649 | 18.9348 | 12.5913 | 63.6824 | ||
| 1 | 0 | 0 | 120.0015 | 32.7871 | 21.4729 | 17.6009 | 87.2144 |
| 1 | 146.2316 | 66.4001 | 23.5046 | 16.7318 | 79.8316 | ||
| 1 | 0 | 175.0637 | 66.7588 | 22.2873 | 19.7209 | 108.3049 | |
| 1 | 187.4890 | 93.1559 | 23.3329 | 17.9300 | 94.3331 |
We can see from the above plot that the customers who have taken advantage of all three offer types not only spend the most with offers but also have the highest total transaction amount. In other words, the higher amount of incentivization is, the more is spent.
The BOGO offer is the leading offer in terms of having a higher total transaction amount than discount and informational offers. Besides, customers tend to spend more than half of the total transaction amount without offers while using BOGO and discount offers.
We would like to know how the overall offer completion rate, transaction amount ratio motivated by an offer, and reward per offer affected by the user demographics.
def PlotTable(Feat1, Feat2, Inp = User_Data):
Female_Avg = User_Data[User_Data['Gender Female']==1].groupby(Feat1).mean()[Feat2]
Male_Avg = User_Data[User_Data['Gender Male']==1].groupby(Feat1).mean()[Feat2]
return Female_Avg, Male_Avg
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(14, 13.5))
ax = ax.ravel()
# First Row
R = ['Offer Comp Rec Ratio', 'Offer Trans Amnt Ratio', 'Reward per Offer']
for i in range(len(R)):
M, F = PlotTable('Member Tenure', R[i])
_ = ax[i].scatter(F.index, F, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[i].scatter(M.index, M, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[i].set_xlim(0,70)
_ = ax[i].set_xlabel('Member Tenure', fontsize = 11)
_ = ax[i].set_ylabel('Avg ' + Feat_Dict[R[i]], fontsize = 11)
_ = ax[i].set_title('Avg ' + Feat_Dict[R[i]] + '\n' + 'By Membership Tenure', fontsize = 11)
_ = ax[i].legend(loc='lower right')
_ = ax[i].set_xlim(0, 70)
_ = ax[i].set_ylim(0, np.ceil(max(M.max(),F.max())))
# Second Row
for i in range(len(R)):
M, F = PlotTable('Income', R[i])
_ = ax[3+i].scatter(F.index, F, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[3+i].scatter(M.index, M, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[3+i].set_xlim(0,70)
_ = ax[3+i].set_xlabel('Income', fontsize = 11)
_ = ax[3+i].set_ylabel('Avg ' + Feat_Dict[R[i]], fontsize = 11)
_ = ax[3+i].set_title('Avg ' + Feat_Dict[R[i]] + '\n' + 'By Membership Tenure', fontsize = 11)
_ = ax[3+i].legend(loc='lower right')
_ = ax[3+i].set_xlim(0, 1.5e5)
_ = ax[3+i].set_ylim(0, np.ceil(max(M.max(),F.max())))
# Third Row
for i in range(len(R)):
M, F = PlotTable('Age', R[i])
_ = ax[6+i].scatter(F.index, F, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[6+i].scatter(M.index, M, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[6+i].set_xlim(0,70)
_ = ax[6+i].set_xlabel('Age', fontsize = 11)
_ = ax[6+i].set_ylabel('Avg ' + Feat_Dict[R[i]], fontsize = 11)
_ = ax[6+i].set_title('Avg ' + Feat_Dict[R[i]] + '\n' + 'By Membership Tenure', fontsize = 11)
_ = ax[6+i].legend(loc='lower right')
_ = ax[6+i].set_xlim(0, 120)
_ = ax[6+i].set_ylim(0, np.ceil(max(M.max(),F.max())))
_ = plt.tight_layout()
User_Data['BOGO Conv Rate'] = User_Data['BOGO comp']/User_Data['BOGO Offer Rec']
User_Data['Disc Conv Rate'] = User_Data['Disc comp']/User_Data['Disc Offer Rec']
User_Data['Info Conv Rate'] = User_Data['Info comp']/User_Data['Info Offer Rec']
Group = User_Data[['BOGO Conv Rate', 'Disc Conv Rate','Info Conv Rate']].agg({'mean'}).T
TempKey = ['BOGO Conv Rate', 'Disc Conv Rate', 'Info Conv Rate']
TempVals = ['BOGO Offer Conversion Rate', 'Discount Offer Conversion Rate', 'Information Offer Conversion Rate']
Temp = dict(zip(TempKey,TempVals))
Feat_Dict.update(Temp)
del Temp, TempKey, TempVals
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(14, 6), sharex=False)
_ = Group.plot.bar(ax = ax[0], legend = False, color= 'mediumorchid', edgecolor='indigo', hatch = '///', fontsize = 14,rot=0)
_ = ax[0].set_xticks(np.arange(3))
_ = ax[0].set_xticklabels(['BOGO Offer\nConversion Rate', 'Information Offer\nConversion Rate',
'Discount Offer\nConversion Rate'])
_ = ax[0].set_title('Ave Conversion Rate by Offer', fontsize = 14)
_ = ax[0].set_xlabel('Average', fontsize = 14)
_ = ax[0].set_ylim([0, .5])
_ = ax[0].set_yticks(np.arange(0, .55, 0.05))
_ = Group.plot.pie(ax = ax[1], y= 'mean', startangle=90, label = '', labels = None,
colors = sns.set_palette(['lightskyblue','limegreen','lightsalmon']),
legend=True, autopct='%1.1f%%', fontsize=14,
pctdistance=0.85, explode = (0.05,0.05,0.05))
_ = ax[1].legend(bbox_to_anchor=(.25, 1), labels= [Feat_Dict[x] for x in Group.index], fontsize = 12)
_ = ax[1].add_artist(plt.Circle((0,0),0.70,fc='white'))
def PlotTable(Feat1, Feat2, Inp = User_Data):
Female_Avg = User_Data[User_Data['Gender Female']==1].groupby(Feat1).mean()[Feat2]
Male_Avg = User_Data[User_Data['Gender Male']==1].groupby(Feat1).mean()[Feat2]
return Female_Avg, Male_Avg
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(14, 13.5))
ax = ax.ravel()
# First Row
R = ['BOGO Conv Rate', 'Disc Conv Rate', 'Info Conv Rate']
for i in range(len(R)):
M, F = PlotTable('Member Tenure', R[i])
_ = ax[i].scatter(F.index, F, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[i].scatter(M.index, M, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[i].set_xlim(0,70)
_ = ax[i].set_xlabel('Member Tenure', fontsize = 11)
_ = ax[i].set_ylabel('Avg ' + Feat_Dict[R[i]], fontsize = 11)
_ = ax[i].set_title('Avg ' + Feat_Dict[R[i]] + '\n' + 'By Membership Tenure', fontsize = 11)
_ = ax[i].legend(loc='lower right')
_ = ax[i].set_xlim(0, 70)
_ = ax[i].set_ylim(0, np.ceil(max(M.max(),F.max())))
# Second Row
for i in range(len(R)):
M, F = PlotTable('Income', R[i])
_ = ax[3+i].scatter(F.index, F, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[3+i].scatter(M.index, M, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[3+i].set_xlim(0,70)
_ = ax[3+i].set_xlabel('Income', fontsize = 11)
_ = ax[3+i].set_ylabel('Avg ' + Feat_Dict[R[i]], fontsize = 11)
_ = ax[3+i].set_title('Avg ' + Feat_Dict[R[i]] + '\n' + 'By Membership Tenure', fontsize = 11)
_ = ax[3+i].legend(loc='lower right')
_ = ax[3+i].set_xlim(0, 1.5e5)
_ = ax[3+i].set_ylim(0, np.ceil(max(M.max(),F.max())))
# Third Row
for i in range(len(R)):
M, F = PlotTable('Age', R[i])
_ = ax[6+i].scatter(F.index, F, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[6+i].scatter(M.index, M, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[6+i].set_xlim(0,70)
_ = ax[6+i].set_xlabel('Age', fontsize = 11)
_ = ax[6+i].set_ylabel('Avg ' + Feat_Dict[R[i]], fontsize = 11)
_ = ax[6+i].set_title('Avg ' + Feat_Dict[R[i]] + '\n' + 'By Membership Tenure', fontsize = 11)
_ = ax[6+i].legend(loc='lower right')
_ = ax[6+i].set_xlim(0, 120)
_ = ax[6+i].set_ylim(0, np.ceil(max(M.max(),F.max())))
_ = plt.tight_layout()